In [1]:
import ipykernel
import folium
import pandas as pd
import numpy as np
from scipy import stats
import nvector
import math
import colorlover
from functools import lru_cache

%matplotlib inline

import plotly
plotly.offline.init_notebook_mode()
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import cufflinks as cf
cf.set_config_file(offline=True, world_readable=True, theme='pearl')
In [2]:
gps_points = pd.read_csv('./data/sample3.csv')
In [3]:
def extract_time(point):
    try:
        time_str = str(int(point['time_value']))
        time_str = "0"*(8-len(time_str)) + time_str
        datetime_str = str(int(point['date_value'])) + time_str
        return pd.to_datetime(datetime_str, format='%d%m%y%H%M%S%f')
    except Exception as e:
        # print(point, e)
        pass 

def GEO_2_ECEF(lat, lng, alt):
    wgs84 = nvector.FrameE(name='WGS84')
    point = wgs84.GeoPoint(latitude=lat, longitude=lng, z=alt, degrees=True)
    p_EB_E = point.to_ecef_vector()
    return p_EB_E.pvector.ravel()

def GEO_dist(lat, lng, alt, lat_ref, lng_ref, alt_ref):
    wgs84 = nvector.FrameE(name='WGS84')
    point = wgs84.GeoPoint(latitude=lat, longitude=lng, z=alt, degrees=True)
    point_ref = wgs84.GeoPoint(latitude=lat_ref, longitude=lng_ref, z=alt_ref, degrees=True)
    ellipsoidal_dist_12, _, _ = point_ref.distance_and_azimuth(point)
    return ellipsoidal_dist_12

def pre_processing(gps_points):
    # Need more than 10 points to accept a path
    gps_points = gps_points.groupby('id').filter(lambda x: len(x) > 10) 
    # Need more than 3 satellites to accept a point 
    gps_points = gps_points[gps_points['satellites_value'] > 3]
    
    gps_points['time'] = gps_points.apply(extract_time, axis=1)
    # Remove Error
    gps_points = gps_points[gps_points['time'] > pd.Timestamp('2016-01-01')]

    @lru_cache(maxsize=None)
    def get_center(path_id):
        path = gps_points[gps_points.id==path_id]
        return path[path.satellites_value >= 6].mean()[['location_lat', 'location_lng', 'altitude_value']]
    
    center = gps_points['id'].apply(get_center) 
    center.columns = ['center_lat', 'center_lng', 'center_alt']
    gps_points = gps_points.join(center)
    
    def get_distance(row, flat=False):
        return GEO_dist(row['location_lat'],
                        row['location_lng'],
                        0 if flat else row['altitude_value'],
                        row['center_lat'],
                        row['center_lng'],
                        0 if flat else  row['center_alt'])
    
    gps_points['distance'] = gps_points.apply(get_distance, axis=1)
    gps_points['distance_2D'] = gps_points.apply(get_distance, axis=1, flat=True)

    gps_points['delta_lat'] = gps_points['location_lat'] - gps_points['center_lat']
    gps_points['delta_lng'] = gps_points['location_lng'] - gps_points['center_lng']
    gps_points['delta_alt'] = gps_points['altitude_value'] - gps_points['center_alt']

    return gps_points

gps_points = pre_processing(gps_points)
In [4]:
groupedby = gps_points.groupby('satellites_value')
In [5]:
# https://plot.ly/~ben_derv/8/number-of-extracted-positions-by-satellites/
    
groupedby.size().iplot(
    kind='bar', 
    title="Number of extracted positions by satellites", 
    yTitle="Number of extracted positions",
    xTitle="Number of satellites",
    dimensions=(500,400)
)
In [6]:
# https://plot.ly/~ben_derv/12/distribution-of-recorded-positions/

def draw_points_by_satellites(path):
    iplot({
        'data': [
            {
                'x': path[path['satellites_value']==sat]['location_lng'],
                'y': path[path['satellites_value']==sat]['location_lat'],
                'name': sat, 
                'mode': 'markers',
            } for sat in range(3, path['satellites_value'].max()+1)
        ],
        'layout': {
            'xaxis': {'title': 'Longitude'},
            'yaxis': {'title': 'Latitude'},
            'title': 'Distribution of recorded positions'
        }
    })

draw_points_by_satellites(gps_points[gps_points.id==8])
In [12]:
# https://plot.ly/~ben_derv/4/cumulated-of-gps-record-by-distance/
def draw_distributed_cumsum(path):
    def distributed_cumsum(path):
        values, base = np.histogram(path['distance'], bins=40)
        cumulative = np.cumsum(values)
        return cumulative/cumulative[-1], base


    iplot({
        'data': [{
                'y': distributed_cumsum(path[path['satellites_value'] == sat])[0],
                'x': distributed_cumsum(path[path['satellites_value'] == sat])[1],
                'name': sat, 
            } for sat in range(3, path['satellites_value'].max()+1)
        ],
        'layout': {
            'xaxis': {'title': 'Distance from center (meters)'},
            'yaxis': {'title': '% cumulated of positions recorded'},
            'title': 'Cumulated % of GPS record by distance '
        }
    })

draw_distributed_cumsum(gps_points)
In [13]:
# https://plot.ly/~ben_derv/12/distribution-of-recorded-positions/
    
def draw_2_sma_satellites_value_by_time(path):
    path_time = path.set_index(path['time']).sort_index()
    path_time = path_time.iloc[::10, :] # if too many points
    path_time['satellites_value'].ta_plot(study='sma',
                                          periods=[100, 10],
                                          include=True,
                                          title='Satellites values by time (Simple Moving Averages)',
                                          mode='markers',
                                          size=1)

draw_2_sma_satellites_value_by_time(gps_points)
In [16]:
# https://plot.ly/~ben_derv/14/lat-vs-lng/

def draw_2d_distribution(path):
    scl = colorlover.scales['9']['seq']['Blues']
    colorscale = [ [ float(i)/float(len(scl)-1), scl[i] ] for i in range(len(scl)) ]

    path = path[path['distance'] < 20]

    x_grid = np.linspace(
        path['location_lng'].min(),
        path['location_lng'].max(),
        100)
    y_grid = np.linspace(
        path['location_lat'].min(),
        path['location_lat'].max(),
        100)

    def kde_scipy(x, x_grid, bandwidth=0.2 ):
        kde = stats.gaussian_kde(x, bw_method=bandwidth / x.std(ddof=1) )
        return kde.evaluate(x_grid)

    trace1 = Histogram2dContour(
        x=path['location_lng'],
        y=path['location_lat'],
        name='density',
        ncontours=20,
        colorscale=colorscale,
        showscale=False
    )
    trace2 = Histogram(
        x=path['location_lng'],
        name='x density',
        yaxis='y2',
        histnorm='probability density',
        marker=dict(color='rgb(217, 217, 217)'),
        nbinsx=25
    )
    trace2s = Scatter(
        x=x_grid,
        y=kde_scipy( path['location_lng'].as_matrix(), x_grid ),
        yaxis='y2',
        line = dict( color='rgb(31, 119, 180)' ),
        fill='tonexty',
    )
    trace3 = Histogram(
        y=path['location_lat'],
        name='y density',
        xaxis='x2',
        histnorm='probability density',
        marker=dict(color='rgb(217, 217, 217)'),
        nbinsy=50
    )
    trace3s = Scatter(
        y=y_grid,
        x=kde_scipy( path['location_lat'].as_matrix(), y_grid ),
        xaxis='x2',
        line = dict( color='rgb(31, 119, 180)' ),
        fill='tonextx',
    )

    data = [trace1, trace2, trace2s, trace3, trace3s]

    layout = Layout(
        showlegend=False,
        autosize=False,
        width=700,
        height=700,
        hovermode='closest',
        bargap=0,

        xaxis=dict(domain=[0, 0.746], linewidth=2, linecolor='#444', title='Lng',
                   showgrid=False, zeroline=False, ticks='', showline=True, mirror=True),

        yaxis=dict(domain=[0, 0.746],linewidth=2, linecolor='#444', title='Lat',
                   showgrid=False, zeroline=False, ticks='', showline=True, mirror=True),

        xaxis2=dict(domain=[0.75, 1], showgrid=False, zeroline=False, ticks='',
                    showticklabels=False ),

        yaxis2=dict(domain=[0.75, 1], showgrid=False, zeroline=False, ticks='',
                    showticklabels=False ),
    )

    iplot(Figure(data=data, layout=layout))

draw_2d_distribution(gps_points)
In [17]:
#https://plot.ly/22/~ben_derv/#

groupedby.mean()['hdop_value'].iplot(
    kind='bar', 
    title="HDOP average by number of satellites", 
    xTitle="Number of satellites",
    yTitle='HDOP',
    dimensions=(800,500)
)
In [35]:
# LATITUDE

groupedby['delta_lat'].mean().iplot(
    kind='bar', 
    title="Average latitude distance from center by satellites", 
    xTitle='Number of satellites',
    yTitle='Latitude distance',
    dimensions=(500,400)
)

groupedby['delta_lat'].std().iplot(
    kind='bar', 
    title="Standard deviation of latitude distance from center by satellites", 
    xTitle='Number of satellites',
    yTitle='Latitude distance',
    dimensions=(500,400)
)
In [36]:
# LATITUDE

groupedby['delta_lng'].mean().iplot(
    kind='bar', 
    title="Average longitude distance from center by satellites", 
    xTitle='Number of satellites',
    yTitle='Longitude distance',
    dimensions=(500,400)
)

groupedby['delta_lng'].std().iplot(
    kind='bar', 
    title="STD of longitude distance from center by satellites", 
    xTitle='Number of satellites',
    yTitle='Longitude distance',
    dimensions=(500,400)
)
In [57]:
groupedby['delta_alt'].mean().iplot(
   kind='bar', 
   title="Altitude mean by number of satellites (meters)", 
   xTitle='Number of satellites',
   yTitle='Altitude distance',
   dimensions=(500,400)
)
groupedby['delta_alt'].std().iplot(
   kind='bar', 
   title="Altitude std by number of satellites (meters)", 
   xTitle='Number of satellites',
   yTitle='Altitude distance',
   dimensions=(500,400)
)
In [55]:
gps_points = pd.read_csv('./data/sample.csv', sep=',')

def create_map(data, filename='gps.html', max_points=100):
    center = [data['location_lat'].mean(), data['location_lng'].mean()]
    map_osm = folium.Map(location=center, zoom_start=18)
    for _, record in data.iterrows():
        if _ > max_points: 
            print('WARNING: DATASET TOO LARGE')
            break
        map_osm.add_children(folium.Marker(location=[record['location_lat'], record['location_lng']], popup=str(record['id'])))
    map_osm.save(outfile=filename)
    
path = gps_points[gps_points.id==452]
create_map(path)
In [54]:
%%HTML
<iframe width=100% height=500 src="gps.html"></iframe>
In [56]:
def linear_regression(path):
    slope, intercept, r_value, p_value, std_err = stats.linregress(path['location_lat'], path['location_lng'])
    line = slope*path['location_lat'] + intercept

    data = Data([{
        'x': path['location_lat'],
        'y': path['location_lng'],
        'mode': 'markers',
        'name':'Data'
    }, {
        'y': line, 
        'x': path['location_lat'],
        'name':'Linear Fit'
    }])
    layout = Layout(
        annotations = [Annotation(
            x=path['location_lat'].mean(),
            y=path['location_lng'].mean()+3*path['location_lng'].std(),
            text='r_value = {}'.format(r_value),
            showarrow=False,
            font=Font(size=16)
        )]
    )
    iplot(Figure(data=data, layout=layout))

linear_regression(path)
In [59]:
# https://plot.ly/~ben_derv/0/gps/
# This is a special record in 'mode 1', which record every 5 meters. It allow us to 'investigate' 
# the noise received by the satellites

gps_points = pd.read_csv('./data/sample2.csv', sep=';')
draw_points_by_satellites(gps_points)
In [ ]: